cap program drop import_all_subs_csv
program define import_all_subs_csv
	import delimited "$base_data_path/counterfactual_all_subscribers.csv", clear 
	save "$sumstats_data_path/all_markets_subscribers.dta", replace
end


cap program drop harmonize_subs_data
program define harmonize_subs_data
	use "$sumstats_data_path/all_markets_subscribers.dta", clear
	
	rename grossprem_old grossprem
	rename best_guess_subsidy_old best_guess_subsidy
	rename best_guess_av_old best_guess_av
	rename best_guess_netprem_old best_guess_netprem
	
	gen cost = totpaidmonth_span/100
	* Allow for missing replace
	destring best_guess_netprem , replace force
	destring grossprem , replace force
	destring best_guess_subsidy , replace force
	destring best_guess_av , replace force
	gen p = best_guess_netprem / 100
	
	gen grandfathered = (best_guess_metal=="0")
	gen missing_p =  (best_guess_netprem == . | grossprem == .)
	
	* Truncating cost
	*keep if cost >= 0
	replace cost = 0 if sum_concurrent_risk == 0
	local upper_winsor = 89.37 // manual from the est logs
	replace cost = `upper_winsor' if cost > `upper_winsor'

	* Useful demographic variables
	gen_bool married
	drop withkids
	gen withkids = (ndeps > 0)
	gen over50 = age > 50
	gen single_mem = (married==0)&(withkids==0)
	gen unmarried_wkids = (married==0)&withkids
	gen married_nokids = married&(withkids==0)
	gen married_wkids = (married&withkids)
	gen real_urban = inlist(best_guess_ra, 1,2,3) 
	gen ndeps_positive = ndeps if ndeps > 0
	gen cost_positive = cost if cost > 0	
	gen subsidized = (best_guess_subsidy > 0)
	gen all_subsidy = (subsidized==1) | (best_guess_av == 73 | best_guess_av == 87 | best_guess_av == 94)
	
	gen sg_vs_indiv = .
	replace sg_vs_indiv = 0 if (best_guess_market == 1) | (best_guess_market == 2)
	replace sg_vs_indiv = 1 if (best_guess_market == 4)
	drop if best_guess_market == 3
	
	label define sg_vs_indiv 0 "Individual" 1 "Small Group"
	label value sg_vs_indiv sg_vs_indiv
	
	destring best_guess_metal, force replace
	label define metal 0 "Missing" 1 "Catastrophic" 2 "Bronze" 3 "Silver" 4 "Gold" 5 "Platinum"
	label values  best_guess_metal metal

	gen est_sample = (grandfathered == 0) & (missing_p == 0)  & (inlist(best_guess_metal, 2,3,4)) & (payer_id != "M0099")
	
	save "$sumstats_data_path/harmonized_subs.dta", replace
	export delimited using "$sumstats_data_path/harmonized_subs.csv", replace
end


cap program drop harmonize_switchers_data
program define harmonize_switchers_data  
  import delimited "$base_data_path/forced_switchers.csv", clear
  gen complier = 1 if (tracked == 0 | in_samplelead == 1)
  keep subscriberid year in_samplelead complier
  gen switchers = 1
  gen insurance = 0 
  replace insurance = 1 if in_samplelead == 1
  save "$sumstats_data_path/forced_switchers.dta", replace
  
  clear
  use "$sumstats_data_path/harmonized_subs.dta"
  merge 1:1 subscriberid year using "$sumstats_data_path/forced_switchers.dta", keep(match)
  
	save "$sumstats_data_path/harmonized_switchers.dta", replace
end


cap program drop get_serff_data
program define get_serff_data
	import delimited "$base_data_path/orig/ConstructedPlanDat.csv", clear 
        drop if payer_id == "M0099"

	tostring year, replace
	gen constructed_plan_year = constrplanid  + "_" + year
	destring year, replace
	drop metal
	merge 1:m year constructed_plan_year using "$sumstats_data_path/harmonized_subs.dta"
	
	keep if _merge == 3
	gen count = 1
	collapse (mean) individual_rate (count) count, by(year payer_id best_guess_metal sg_vs_indiv)
	rename individual_rate p
	label value sg_vs_indiv sg_vs_indiv
	save "$sumstats_data_path/serff.dta", replace
end
